uniform sampler2D depth;
uniform float split0;
uniform float split1;
uniform float split2;
uniform float splitmax;
uniform mat4 SunCamMatrix;

layout (local_size_x = 8, local_size_y = 8) in;

struct CascadeBoundingBox
{
    int xmin;
    int xmax;
    int ymin;
    int ymax;
    int zmin;
    int zmax;
};

layout (std430) buffer BoundingBoxes
{
    CascadeBoundingBox BB[4];
};

vec4 getPosFromUVDepth(vec3 uvDepth, mat4 InverseProjectionMatrix);

shared int xmin[4];
shared int xmax[4];
shared int ymin[4];
shared int ymax[4];
shared int zmin[4];
shared int zmax[4];

void main()
{
    if (gl_LocalInvocationIndex < 4) {
        xmin[gl_LocalInvocationIndex] = ymin[gl_LocalInvocationIndex] = zmin[gl_LocalInvocationIndex] = 1000;
        xmax[gl_LocalInvocationIndex] = ymax[gl_LocalInvocationIndex] = zmax[gl_LocalInvocationIndex] = -1000;
    }

    barrier();

    ivec3 lmax0 = ivec3(-1000);
    ivec3 lmin0 = ivec3(1000);
    ivec3 lmax1 = ivec3(-1000);
    ivec3 lmin1 = ivec3(1000);
    ivec3 lmax2 = ivec3(-1000);
    ivec3 lmin2 = ivec3(1000);
    ivec3 lmax3 = ivec3(-1000);
    ivec3 lmin3 = ivec3(1000);

    vec2 start_xy = gl_LocalInvocationID.xy + gl_WorkGroupID.xy * gl_WorkGroupSize.xy * 8;
    for (int i = 0; i < 8; i++) {
        for (int j = 0; j < 8; j++) {


            vec2 uv = (start_xy + vec2(i, j) * gl_WorkGroupID.xy) / screen;
            float z = texture(depth, uv).x;
            vec4 xpos = getPosFromUVDepth(vec3(uv, z), InverseProjectionMatrix);
            vec4 lightcoord = InverseViewMatrix * xpos;
            lightcoord /= lightcoord.w;
            lightcoord = SunCamMatrix * lightcoord;
            lightcoord /= lightcoord.w;
            ivec3 lc = ivec3(lightcoord.xyz) * 4;

            if (xpos.z < split0) {
                lmax0 = max(lmax0, lc);
                lmin0 = min(lmin0, lc);
            } else if (xpos.z < split1) {
                lmax1 = max(lmax1, lc);
                lmin1 = min(lmin1, lc);
            } else if (xpos.z < split2) {
                lmax2 = max(lmax2, lc);
                lmin2 = min(lmin2, lc);
            } else if (xpos.z < splitmax) {
                lmax3 = max(lmax3, lc);
                lmin3 = min(lmin3, lc);
            }
        }
    }

    atomicMax(xmax[0], lmax0.x);
    atomicMax(ymax[0], lmax0.y);
    atomicMax(zmax[0], lmax0.z);
    atomicMin(xmin[0], lmin0.x);
    atomicMin(ymin[0], lmin0.y);
    atomicMin(zmin[0], lmin0.z);

    atomicMax(xmax[1], lmax1.x);
    atomicMax(ymax[1], lmax1.y);
    atomicMax(zmax[1], lmax1.z);
    atomicMin(xmin[1], lmin1.x);
    atomicMin(ymin[1], lmin1.y);
    atomicMin(zmin[1], lmin1.z);

    atomicMax(xmax[2], lmax2.x);
    atomicMax(ymax[2], lmax2.y);
    atomicMax(zmax[2], lmax2.z);
    atomicMin(xmin[2], lmin2.x);
    atomicMin(ymin[2], lmin2.y);
    atomicMin(zmin[2], lmin2.z);

    atomicMax(xmax[3], lmax3.x);
    atomicMax(ymax[3], lmax3.y);
    atomicMax(zmax[3], lmax3.z);
    atomicMin(xmin[3], lmin3.x);
    atomicMin(ymin[3], lmin3.y);
    atomicMin(zmin[3], lmin3.z);

    barrier();

    if (gl_LocalInvocationIndex == 0) {
        atomicMax(BB[0].xmax, xmax[0]);
        atomicMax(BB[0].ymax, ymax[0]);
        atomicMax(BB[0].zmax, zmax[0]);
        atomicMin(BB[0].xmin, xmin[0]);
        atomicMin(BB[0].ymin, ymin[0]);
        atomicMin(BB[0].zmin, zmin[0]);

        atomicMax(BB[1].xmax, xmax[1]);
        atomicMax(BB[1].ymax, ymax[1]);
        atomicMax(BB[1].zmax, zmax[1]);
        atomicMin(BB[1].xmin, xmin[1]);
        atomicMin(BB[1].ymin, ymin[1]);
        atomicMin(BB[1].zmin, zmin[1]);

        atomicMax(BB[2].xmax, xmax[2]);
        atomicMax(BB[2].ymax, ymax[2]);
        atomicMax(BB[2].zmax, zmax[2]);
        atomicMin(BB[2].xmin, xmin[2]);
        atomicMin(BB[2].ymin, ymin[2]);
        atomicMin(BB[2].zmin, zmin[2]);

        atomicMax(BB[3].xmax, xmax[3]);
        atomicMax(BB[3].ymax, ymax[3]);
        atomicMax(BB[3].zmax, zmax[3]);
        atomicMin(BB[3].xmin, xmin[3]);
        atomicMin(BB[3].ymin, ymin[3]);
        atomicMin(BB[3].zmin, zmin[3]);
    }
}

